library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 4.2.3
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.2 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ Hmisc::src() masks dplyr::src()
## ✖ Hmisc::summarize() masks dplyr::summarize()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(moments)
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:Hmisc':
##
## subplot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(DT)
## Warning: package 'DT' was built under R version 4.2.3
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.2.3
library(leaflet.extras)
## Warning: package 'leaflet.extras' was built under R version 4.2.3
library(lubridate)
library(ggplot2)
library(reactable)
## Warning: package 'reactable' was built under R version 4.2.3
data <-read.csv("37-00049_UOF-P_2016_prepped.csv")
summary(data) #Displaying Data Summary
## INCIDENT_DATE INCIDENT_TIME UOF_NUMBER OFFICER_ID
## Length:2384 Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## OFFICER_GENDER OFFICER_RACE OFFICER_HIRE_DATE
## Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## OFFICER_YEARS_ON_FORCE OFFICER_INJURY OFFICER_INJURY_TYPE
## Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## OFFICER_HOSPITALIZATION SUBJECT_ID SUBJECT_RACE
## Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## SUBJECT_GENDER SUBJECT_INJURY SUBJECT_INJURY_TYPE SUBJECT_WAS_ARRESTED
## Length:2384 Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## SUBJECT_DESCRIPTION SUBJECT_OFFENSE REPORTING_AREA BEAT
## Length:2384 Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## SECTOR DIVISION LOCATION_DISTRICT STREET_NUMBER
## Length:2384 Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## STREET_NAME STREET_DIRECTION STREET_TYPE
## Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION LOCATION_CITY
## Length:2384 Length:2384
## Class :character Class :character
## Mode :character Mode :character
## LOCATION_STATE LOCATION_LATITUDE LOCATION_LONGITUDE INCIDENT_REASON
## Length:2384 Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## REASON_FOR_FORCE TYPE_OF_FORCE_USED1 TYPE_OF_FORCE_USED2 TYPE_OF_FORCE_USED3
## Length:2384 Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## TYPE_OF_FORCE_USED4 TYPE_OF_FORCE_USED5 TYPE_OF_FORCE_USED6
## Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## TYPE_OF_FORCE_USED7 TYPE_OF_FORCE_USED8 TYPE_OF_FORCE_USED9
## Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## TYPE_OF_FORCE_USED10 NUMBER_EC_CYCLES FORCE_EFFECTIVE
## Length:2384 Length:2384 Length:2384
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
dt_new<-data[-1,]
dt_new[dt_new=="" | dt_new== " " | dt_new == "NULL" ] <- NA
head(dt_new)
## INCIDENT_DATE INCIDENT_TIME UOF_NUMBER OFFICER_ID OFFICER_GENDER
## 2 9/3/2016 4:14:00 AM 37702 10810 Male
## 3 3/22/2016 11:00:00 PM 33413 7706 Male
## 4 5/22/2016 1:29:00 PM 34567 11014 Male
## 5 1/10/2016 8:55:00 PM 31460 6692 Male
## 6 11/8/2016 2:30:00 AM 37879, 37898 9844 Male
## 7 9/11/2016 7:20:00 PM 36724 9855 Male
## OFFICER_RACE OFFICER_HIRE_DATE OFFICER_YEARS_ON_FORCE OFFICER_INJURY
## 2 Black 5/7/2014 2 No
## 3 White 1/8/1999 17 Yes
## 4 Black 5/20/2015 1 No
## 5 Black 7/29/1991 24 No
## 6 White 10/4/2009 7 No
## 7 White 6/10/2009 7 No
## OFFICER_INJURY_TYPE OFFICER_HOSPITALIZATION SUBJECT_ID SUBJECT_RACE
## 2 No injuries noted or visible No 46424 Black
## 3 Sprain/Strain Yes 44324 Hispanic
## 4 No injuries noted or visible No 45126 Hispanic
## 5 No injuries noted or visible No 43150 Hispanic
## 6 No injuries noted or visible No 47307 Black
## 7 No injuries noted or visible No 46549 White
## SUBJECT_GENDER SUBJECT_INJURY SUBJECT_INJURY_TYPE
## 2 Female Yes Non-Visible Injury/Pain
## 3 Male No No injuries noted or visible
## 4 Male No No injuries noted or visible
## 5 Male Yes Laceration/Cut
## 6 Male No No injuries noted or visible
## 7 Female No No injuries noted or visible
## SUBJECT_WAS_ARRESTED SUBJECT_DESCRIPTION SUBJECT_OFFENSE
## 2 Yes Mentally unstable APOWW
## 3 Yes Mentally unstable APOWW
## 4 Yes Unknown APOWW
## 5 Yes FD-Unknown if Armed Evading Arrest
## 6 Yes Unknown Other Misdemeanor Arrest
## 7 Yes Unknown Assault/FV
## REPORTING_AREA BEAT SECTOR DIVISION LOCATION_DISTRICT STREET_NUMBER
## 2 2062 134 130 CENTRAL D14 211
## 3 1197 237 230 NORTHEAST D9 7647
## 4 4153 432 430 SOUTHWEST D6 716
## 5 4523 641 640 NORTH CENTRAL D11 5600
## 6 2167 346 340 SOUTHEAST D7 4600
## 7 1134 235 230 NORTHEAST D9 1234
## STREET_NAME STREET_DIRECTION STREET_TYPE
## 2 Ervay N St.
## 3 Ferguson <NA> Rd.
## 4 bimebella dr <NA> Ln.
## 5 LBJ <NA> Frwy.
## 6 Malcolm X S Blvd.
## 7 Peavy <NA> Rd.
## LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION LOCATION_CITY LOCATION_STATE
## 2 211 N ERVAY ST Dallas TX
## 3 7647 FERGUSON RD Dallas TX
## 4 716 BIMEBELLA LN Dallas TX
## 5 5600 L B J FWY Dallas TX
## 6 4600 S MALCOLM X BLVD Dallas TX
## 7 1234 PEAVY RD Dallas TX
## LOCATION_LATITUDE LOCATION_LONGITUDE INCIDENT_REASON REASON_FOR_FORCE
## 2 32.782205 -96.797461 Arrest Arrest
## 3 32.798978 -96.717493 Arrest Arrest
## 4 32.73971 -96.92519 Arrest Arrest
## 5 <NA> <NA> Arrest Arrest
## 6 <NA> <NA> Arrest Arrest
## 7 32.837527 -96.695566 Arrest Arrest
## TYPE_OF_FORCE_USED1 TYPE_OF_FORCE_USED2 TYPE_OF_FORCE_USED3
## 2 Hand/Arm/Elbow Strike <NA> <NA>
## 3 Joint Locks <NA> <NA>
## 4 Take Down - Group <NA> <NA>
## 5 K-9 Deployment <NA> <NA>
## 6 Verbal Command Take Down - Arm <NA>
## 7 Hand Controlled Escort <NA> <NA>
## TYPE_OF_FORCE_USED4 TYPE_OF_FORCE_USED5 TYPE_OF_FORCE_USED6
## 2 <NA> <NA> <NA>
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## 7 <NA> <NA> <NA>
## TYPE_OF_FORCE_USED7 TYPE_OF_FORCE_USED8 TYPE_OF_FORCE_USED9
## 2 <NA> <NA> <NA>
## 3 <NA> <NA> <NA>
## 4 <NA> <NA> <NA>
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## 7 <NA> <NA> <NA>
## TYPE_OF_FORCE_USED10 NUMBER_EC_CYCLES FORCE_EFFECTIVE
## 2 <NA> <NA> Yes
## 3 <NA> <NA> Yes
## 4 <NA> <NA> Yes
## 5 <NA> <NA> Yes
## 6 <NA> <NA> No, Yes
## 7 <NA> <NA> Yes
###..................................imputation........................................
# skew test for latitude
dt_new$LOCATION_LATITUDE <- as.numeric(dt_new$LOCATION_LATITUDE)
skewness(dt_new$LOCATION_LATITUDE,na.rm=TRUE)
## [1] 0.5937679
dt_new$LOCATION_LONGITUDE <- as.numeric(dt_new$LOCATION_LONGITUDE)
skewness(dt_new$LOCATION_LONGITUDE,na.rm=TRUE)
## [1] 0.2865864
dt_new$LOCATION_LATITUDE <- impute(dt_new$LOCATION_LATITUDE,mean)
dt_new$LOCATION_LONGITUDE <- impute(dt_new$LOCATION_LONGITUDE,mean)
#############....................mode imputaion on time column...........................
Mode <- function(x) {
x <- na.omit(x)
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
cols_to_impute <- c("INCIDENT_TIME")
for (col in cols_to_impute) {
if (sum(is.na(dt_new[[col]])) > 0) {
mode_value <- Mode(dt_new[[col]])
dt_new[[col]][is.na(dt_new[[col]])] <- mode_value
}
}
##########................Groupin and summarizing performed to plot the pie chart.........................
force_used <- dt_new$TYPE_OF_FORCE_USED1
subject_injury <- dt_new$SUBJECT_INJURY == "Yes"
data1 <- data.frame(force_used, subject_injury)
summary_data <- data1 %>%
group_by(force_used) %>%
summarise(total_count = n(), injury_count = sum(subject_injury)) %>%
mutate(percent_injury = injury_count / total_count * 100)
# Sort the data by percentage of injury
summary_data <- summary_data[order(summary_data$percent_injury, decreasing = TRUE),]
# Keep only the top 5 force types and combine the rest into a single "Other" category
top_force <- summary_data$force_used[1:5]
other_force <- "Other"
summary_data$force_used <- ifelse(summary_data$force_used %in% top_force, summary_data$force_used, other_force)
summary_data <- summary_data %>%
group_by(force_used) %>%
summarise(total_count = sum(total_count), injury_count = sum(injury_count)) %>%
mutate(percent_injury = injury_count / total_count * 100)
###.................Pie Chart..........................
fig <- plot_ly(summary_data, labels = ~force_used, values = ~percent_injury, type = "pie",
marker = list(colors = c("#FF00FF", "#blue", "#red", "yellow", "#brown", "#pink")),
textinfo = "label+percent")
fig <- fig %>% layout(title = "Percentage of Subject Injury by Type of Force Used")
fig
he police have a responsibility to ensure that they do not cause unnecessary harm to individuals who are taken into custody. To achieve this, law enforcement officers are trained to use different types of force such as verbal commands, physical holds, and weapons such as tasers, batons, and pepper spray. To monitor this, police departments often collect data on the types of force used by their officers and the injuries that result. The pie chart shows that K9 deployment, pepper ball saturation, and baton strikes are responsible for a significant proportion of injuries, contributing to over 63% of injuries caused to the subject. Therefore, police departments need to minimize the usage of these types of force in order to maintain their protocols and ensure that the apprehension of suspects is carried out in the least harmful manner possible.
dt_new$TIME<-dt_new$INCIDENT_TIME
dt_new$TIME <- format(strptime(dt_new$TIME, "%I:%M:%S %p"), "%H:%M:%S")
dt_new$INC_HOUR <- substr(dt_new$INCIDENT_TIME, 0, 2)
##########...........HISTOGRAM..........................
data_time <- table(dt_new$INC_HOUR)
data_time <- as.data.frame(data_time)
names(data_time) <- c("Time", "Frequency")
data_time <- data_time %>% group_by(Time)
p <- ggplot(data_time, aes(x = Time, y = Frequency)) +
geom_bar(stat = "identity", width = 0.8, fill = "yellow") +
ggtitle("Counts of incidents by time") +
xlab("Time") +
ylab("Count if incidents") +
theme_dark()
# Make the plot interactive using plotly
ggplotly(p)
Crime incidents are a prevalent and unfortunate occurrence in our society, and it is essential for people to be aware and not put themselves in risky situations. To understand the trends of crime incidents happening at different times of the day, a histogram was created to display the number of incidents reported in specific time intervals in Dallas,TX,USA during the year 2016. The peak hour for crime incidents is around 5 pm in the evening, followed by 8 pm in the evening and 12 pm at noon. These findings highlight the need for people to be extra cautious during these times of the day and take necessary precautions to ensure their safety. By releasing this information, the police department can raise awareness among the public and encourage them to take proactive steps to prevent crime incidents.
#########..................VIOLIN PLOT..................
sex <- dt_new$OFFICER_GENDER
years_of_service <- as.numeric(dt_new$OFFICER_YEARS_ON_FORCE)
wdata <- data.frame(sex, years_of_service)
fig <- wdata %>%plot_ly(x = ~sex,y = ~years_of_service,type = 'violin',box = list(visible = T),
meanline = list(visible = T),
split = ~factor(sex))
fig <- fig %>%layout(xaxis = list(title = "Sex"),yaxis = list(title = "Years Of Service",zeroline = F))
fig
Understanding the trend between years of service and gender in a police department is important for identifying potential gender bias, providing insights into diversity and inclusivity, and developing better strategies for retention and promotion of officers.As we see from the given violent plot, maximum number of years that a woman serve for the department is about 31 years while for men it’s 36 years back in 2016,in Dallas. It indicates that men serve a bit longer than women police force.Hence female police officers are to be encouraged more to provide their contributions to the department.
############# Interactive map using leaflet##########################
loc <- dt_new%>%
group_by(LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION,LOCATION_LATITUDE,LOCATION_LONGITUDE) %>%
summarise(count = n()) %>%
arrange(desc(count))
## `summarise()` has grouped output by
## 'LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION', 'LOCATION_LATITUDE'. You can
## override using the `.groups` argument.
loc$LOCATION_LATITUDE <- as.numeric(format(loc$LOCATION_LATITUDE, nsmall = 6))
loc$LOCATION_LONGITUDE <- as.numeric(format(loc$LOCATION_LONGITUDE, nsmall = 6))
top_locations<-head(loc,20)
leaflet() %>%addProviderTiles(providers$Esri.WorldStreetMap) %>%addTerminator() %>%
addEasyButton(easyButton(icon="fa-globe", title="Zoom to Level 1",onClick=JS("function(btn, map){ map.setZoom(1); }"))) %>%
addScaleBar() %>%setView(lng = -96.7, lat = 32.78, zoom = 10)%>%addEasyButton(easyButton(icon="fa-crosshairs", title="Locate Dallas",onClick=JS("function(btn, map){ map.setView([ 32.77666,-96.79699], zoom = 10); }")))%>%
addMiniMap(toggleDisplay = TRUE,position="bottomright") %>%
addMarkers(data = top_locations,lat = ~LOCATION_LATITUDE,lng = ~LOCATION_LONGITUDE,popup = ~paste(LOCATION_FULL_STREET_ADDRESS_OR_INTERSECTION, "<br>No of Incidents reported: ", count)
)%>%
leaflet.extras::addSearchOSM(options = searchOptions(collapsed = TRUE))
The safety of individuals is of utmost importance, and an interactive map has been created to highlight the top 15 locations that had the most reported incidents in 2016. This map utilizes the latitude and longitude information available in the data set to pinpoint these locations on the map. 111 W COMMERCE STREET had reported the highest number of incidents approximately 22 in the year 2016 which i in close proximity with other locations such as 205 S LAMAR ST, 1100 S SAINT PAUL ST, 1600 CHESTNUT ST, and 2500 VICTORY AVE streets that also appear on the top 15 list of high volume incidents recorded. It is important for people to take responsibility for their own safety and avoid roaming around these danger spots. especially during the time of day reported through the histogram.By staying informed and being aware of the potential danger zones in their community, people can take steps to protect themselves and stay safe.
#####################Scatter Plot#######################
df <- data.frame(BEAT = 1:1000, OFFICER_YEARS_ON_FORCE = rnorm(1000, 10, 5))
# Group BEAT numbers into intervals of 20
df$BEAT_GROUP <- cut(df$BEAT, breaks = seq(0, 1000, by = 20))
# Filter out missing values
dt_new_filtered <- df[!is.na(df$BEAT) & !is.na(df$OFFICER_YEARS_ON_FORCE),]
# Create the scatter plot
scatterplt<- ggplot(dt_new_filtered, aes(x = BEAT, y = OFFICER_YEARS_ON_FORCE)) +
geom_point() +
geom_smooth(method=lm)+
labs(y = "Officer Years on Force", x = "Beat") +
ggtitle("Scatter Plot of Officer Years on Force vs. Beat") +
theme_dark()
ggplotly(scatterplt)
## `geom_smooth()` using formula = 'y ~ x'
The scatter plot created between BEAT and officer years on force for the particular BEAT gives us valuable insights into the distribution of experienced officers across different BEATS . This is because, in general, beats with a higher number of incidents reported in the previous years require more experienced officers to handle the situation effectively.To ensure that the plot is easy to interpret, the beats were grouped into four categories based on the number of incidents reported. These categories were 0-250, 250-500, 500-750, and 750-1000. The trend line on the plot created by geom smooth suggests that the amount of experienced officers and least experienced officers required to monitor are distributed equally around the beats. However, a more detailed visual analysis revealed that officers with more than 25+ years of experience are more likely to handle the beats in the 500-750 category on their own, indicating that this category may require more experienced officers than the others. Overall, the scatter plot and the analysis provide valuable insights into the distribution of experienced officers across different beats, which can inform police department decisions on officer deployment and resource allocation.
###########################TIME SERIES PLOT###############################################
dt_new <- dt_new %>% mutate(INCIDENT_TIME = if_else(INCIDENT_TIME == "NULL", "00:00:00", INCIDENT_TIME))
dt_new <- dt_new %>% mutate(INCIDENT_DATETIME = mdy_hms(paste(INCIDENT_DATE, INCIDENT_TIME)))
filtered_dt_new <- dt_new %>% drop_na(INCIDENT_DATETIME)
filtered_dt_new <- filtered_dt_new %>% mutate(Months = floor_date(INCIDENT_DATETIME, "month"))
injury_data <- filtered_dt_new %>% filter(OFFICER_INJURY == "Yes")
injury_per_month <- injury_data %>%
group_by(Months) %>%
summarise(injuries = n())
injury_per_month$Months<- as.Date(injury_per_month$Months)
time_series <- ggplot(injury_per_month, aes(x = Months, y = injuries)) +
geom_line(color = "darkblue",size=2) +
geom_point(color = "orange",size=3) +
theme_dark() +
scale_x_date(labels = scales::date_format("%b %Y"), breaks = scales::date_breaks("1 months")) +
scale_y_continuous(limits = c(10, 140)) +
labs(x = "Month range",
y = "Number of Officer Injured",
title = "Subject officer by Month")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Display the plot
ggplotly(time_series)
There are several consequences/implications that can be performed on understanding the trend between the number of officers injured in a month in a police department. Firstly, this evaluation can lead to more training to police officers on how to handle the situations without getting injured and this number of injuries over time can bring out some changes in the existing safety policies and procedures.As we plot this time series graph between month and the count of officers injured every month in the year 2016, we observe that above a certain percentage officers from the start of the Year till June gets injured with some variations in numbers each month, however, the percentage of injury experienced at the end of the year is far less than the start. This gives us a basic idea about the count/ severity of the crimes during these two quarters dealt by the officers. Presumably, the first quarter could have reported more number of crimes than the second but their intensity/severity is unknown.. Additionally, this data can be used to inform decision-making regarding resource allocation and deployment of officers.
##########################TWO-WAY TABLE########################3
table_data <- dt_new %>%
group_by(SUBJECT_OFFENSE, SUBJECT_WAS_ARRESTED) %>%
summarise(count = n()) %>%
pivot_wider(names_from = SUBJECT_WAS_ARRESTED, values_from = count) %>%
replace(is.na(.), 0)
## `summarise()` has grouped output by 'SUBJECT_OFFENSE'. You can override using
## the `.groups` argument.
reactable(table_data,
columns = list(SUBJECT_OFFENSE = colDef(align = "left"),
`No` = colDef(align = "center", width = 120),
`Yes` = colDef(align = "center", width = 120)
),
bordered = TRUE,
striped = TRUE
)
The primary responsibility of the petrol department is to arrest all the crime doers to reduce the occurrence of criminal activities. Also it is important that severe crimes are taken immediate actions, if not it can become a threat to the society. This is perhaps one of the reason for us to create a table that displays the count of number of arrests that were made and the number that weren’t for every offense types. This table can be used to evaluate the effectiveness of law enforcements. Also it says which offenses are more likely to end up in arrest and which ones need additional resources to arrest the crime doer. Here we can see that the crime type APOWW has occurred a lot of times and almost 90% of the arrests were made while it as well tops in the count of No arrest made list with 12. Except this, as we see through the table almost arrests were made for all the type of crimes and no arrest action was pending. This shows the effectiveness of the Police Department in the Texas at their law enforcements.
df_filtered_subject_gender <- dt_new %>% filter(SUBJECT_GENDER != "NULL")
df_filtered_data <- df_filtered_subject_gender[!is.na(df_filtered_subject_gender$SUBJECT_GENDER) & !is.na(df_filtered_subject_gender$TYPE_OF_FORCE_USED1), ]
contingency_subject_force_table <- table(df_filtered_data$SUBJECT_GENDER, df_filtered_data$TYPE_OF_FORCE_USED1)
contingency_subject_force_table <- contingency_subject_force_table[-which(rowSums(contingency_subject_force_table) <= 5), ]
contingency_subject_force_table <- contingency_subject_force_table[, -which(colSums(contingency_subject_force_table) <= 5)]
# Perform the chi-squared test
chi_squared_res <- chisq.test(contingency_subject_force_table)
## Warning in chisq.test(contingency_subject_force_table): Chi-squared
## approximation may be incorrect
# Display the results
chi_squared_res
##
## Pearson's Chi-squared test
##
## data: contingency_subject_force_table
## X-squared = 98.134, df = 20, p-value = 2.713e-12
subject_gender <- dt_new %>% filter(SUBJECT_GENDER != "NULL")
subject_offense_data <- subject_gender[!is.na(subject_gender$SUBJECT_GENDER) & !is.na(subject_gender$SUBJECT_OFFENSE), ]
contingency_subject_offense <- table(subject_offense_data$SUBJECT_GENDER, subject_offense_data$SUBJECT_OFFENSE)
contingency_subject_offense <- contingency_subject_offense[-which(rowSums(contingency_subject_offense) <= 5), ]
contingency_subject_offense_table <- contingency_subject_offense[, -which(colSums(contingency_subject_offense) <= 5)]
# Perform the chi-squared test
chi_squared_result <- chisq.test(contingency_subject_offense_table)
## Warning in chisq.test(contingency_subject_offense_table): Chi-squared
## approximation may be incorrect
chi_squared_result
##
## Pearson's Chi-squared test
##
## data: contingency_subject_offense_table
## X-squared = 120.07, df = 44, p-value = 5.58e-09
# Convert the table object to a data frame
contingencydt <- as.data.frame(contingency_subject_offense_table)
contingencydt
## Var1 Var2 Freq
## 1 Female APOWW 116
## 2 Male APOWW 234
## 3 Female Assault 4
## 4 Male Assault 25
## 5 Female Assault/FV 20
## 6 Male Assault/FV 72
## 7 Female Assault/FV, Assault/Public Servant 0
## 8 Male Assault/FV, Assault/Public Servant 7
## 9 Female Assault/FV, Evading Arrest, Resisting Arrest 1
## 10 Male Assault/FV, Evading Arrest, Resisting Arrest 5
## 11 Female Assault/FV, Other Felony Arrest 0
## 12 Male Assault/FV, Other Felony Arrest 8
## 13 Female Assault/FV, Resisting Arrest 5
## 14 Male Assault/FV, Resisting Arrest 22
## 15 Female Assault/FV, Warrant/Hold 0
## 16 Male Assault/FV, Warrant/Hold 8
## 17 Female Assault/Public Servant 9
## 18 Male Assault/Public Servant 38
## 19 Female Assault/Public Servant, Warrant/Hold 1
## 20 Male Assault/Public Servant, Warrant/Hold 6
## 21 Female Burglary of a Vehicle 0
## 22 Male Burglary of a Vehicle 7
## 23 Female Burglary/Habitation 4
## 24 Male Burglary/Habitation 20
## 25 Female Crim Trespass/Bldg/Prop 2
## 26 Male Crim Trespass/Bldg/Prop 18
## 27 Female Crim Trespass/Bldg/Prop, Resisting Arrest 2
## 28 Male Crim Trespass/Bldg/Prop, Resisting Arrest 10
## 29 Female Disorderly Conduct 8
## 30 Male Disorderly Conduct 12
## 31 Female Disorderly Conduct, Resisting Arrest 2
## 32 Male Disorderly Conduct, Resisting Arrest 4
## 33 Female Drug Possession - Misdemeanor 0
## 34 Male Drug Possession - Misdemeanor 11
## 35 Female DWI 1
## 36 Male DWI 14
## 37 Female DWI, Resisting Arrest 4
## 38 Male DWI, Resisting Arrest 5
## 39 Female Evading Arrest 2
## 40 Male Evading Arrest 32
## 41 Female Evading Arrest, Drug Possession - Misdemeanor 0
## 42 Male Evading Arrest, Drug Possession - Misdemeanor 8
## 43 Female Evading Arrest, Warrant/Hold 0
## 44 Male Evading Arrest, Warrant/Hold 17
## 45 Female No Arrest 48
## 46 Male No Arrest 255
## 47 Female Other Felony Arrest 4
## 48 Male Other Felony Arrest 19
## 49 Female Other Misdemeanor Arrest 10
## 50 Male Other Misdemeanor Arrest 19
## 51 Female Other Misdemeanor Arrest, Resisting Arrest 0
## 52 Male Other Misdemeanor Arrest, Resisting Arrest 6
## 53 Female Public Intoxication 38
## 54 Male Public Intoxication 143
## 55 Female Public Intoxication, Assault/Public Servant 3
## 56 Male Public Intoxication, Assault/Public Servant 4
## 57 Female Public Intoxication, Drug Possession - Misdemeanor 0
## 58 Male Public Intoxication, Drug Possession - Misdemeanor 10
## 59 Female Public Intoxication, Resisting Arrest 4
## 60 Male Public Intoxication, Resisting Arrest 27
## 61 Female Public Intoxication, Resisting Arrest, Warrant/Hold 0
## 62 Male Public Intoxication, Resisting Arrest, Warrant/Hold 7
## 63 Female Public Intoxication, Resisting Search and Transport 1
## 64 Male Public Intoxication, Resisting Search and Transport 5
## 65 Female Resisting Arrest 4
## 66 Male Resisting Arrest 9
## 67 Female Resisting Arrest, Warrant/Hold 3
## 68 Male Resisting Arrest, Warrant/Hold 10
## 69 Female Resisting Search and Transport 1
## 70 Male Resisting Search and Transport 9
## 71 Female Robbery 0
## 72 Male Robbery 7
## 73 Female Robbery, Evading Arrest 0
## 74 Male Robbery, Evading Arrest 8
## 75 Female Theft 1
## 76 Male Theft 5
## 77 Female Traffic Violation 0
## 78 Male Traffic Violation 9
## 79 Female Unauthorize Use Vehicle, Evading Arrest 0
## 80 Male Unauthorize Use Vehicle, Evading Arrest 8
## 81 Female Warrant/Hold 16
## 82 Male Warrant/Hold 94
## 83 Female Warrant/Hold, Drug Possession - Misdemeanor 0
## 84 Male Warrant/Hold, Drug Possession - Misdemeanor 9
## 85 Female Warrant/Hold, Evading Arrest 0
## 86 Male Warrant/Hold, Evading Arrest 8
## 87 Female Warrant/Hold, Resisting Arrest 11
## 88 Male Warrant/Hold, Resisting Arrest 8
## 89 Female Warrant/Hold, Resisting Search and Transport 1
## 90 Male Warrant/Hold, Resisting Search and Transport 10
# Display the results
correlation_heatmap <- ggplot(contingencydt, aes(x = Var1, y = Var2, fill = Freq)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "white", high = "RED") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Heat Map of Subject offense vs Subject Gender",
x = "Subject Gender",
y = "Subject Offense",
fill = "Count")
ggplotly(correlation_heatmap)
Since correlation analysis had to be performed between two continous variable, we have performed the chi squared test for two categorical variable Subbect_offense and Subject gender. The obtained chi squared result is: data: contingency_subject_force_table X-squared = 98.134, df = 20, p-value = 2.713e-12 and the heatmap is displayed. With the results obtained we can confirm that the two variables are highly correlated with each other.
Reference Websites Utilized for the project: 1. KAGGLE 2. DATACAMP 3. CRAN 4.R-Bloggers 5.Rpubs